{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.5/dist-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
      "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
     ]
    }
   ],
   "source": [
    "from sklearn import metrics\n",
    "import numpy as np\n",
    "import sklearn.datasets\n",
    "import re\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from sklearn.linear_model import SGDClassifier\n",
    "from sklearn.cross_validation import train_test_split\n",
    "import lightgbm as lgb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'2.0.10'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lgb.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# clear string\n",
    "def clearstring(string):\n",
    "    string = re.sub('[^A-Za-z ]+', '', string)\n",
    "    string = string.split(' ')\n",
    "    string = filter(None, string)\n",
    "    string = [y.strip() for y in string]\n",
    "    string = ' '.join(string)\n",
    "    return string.lower()\n",
    "\n",
    "# because of sklean.datasets read a document as a single element\n",
    "# so we want to split based on new line\n",
    "def separate_dataset(trainset):\n",
    "    datastring = []\n",
    "    datatarget = []\n",
    "    for i in range(len(trainset.data)):\n",
    "        data_ = trainset.data[i].split('\\n')\n",
    "        # python3, if python2, just remove list()\n",
    "        data_ = list(filter(None, data_))\n",
    "        for n in range(len(data_)):\n",
    "            data_[n] = clearstring(data_[n])\n",
    "        datastring += data_\n",
    "        for n in range(len(data_)):\n",
    "            datatarget.append(trainset.target[i])\n",
    "    return datastring, datatarget"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['anger', 'fear', 'joy', 'love', 'sadness', 'surprise']\n",
      "416809\n",
      "416809\n"
     ]
    }
   ],
   "source": [
    "# you can change any encoding type\n",
    "trainset = sklearn.datasets.load_files(container_path = 'data', encoding = 'UTF-8')\n",
    "trainset.data, trainset.target = separate_dataset(trainset)\n",
    "print (trainset.target_names)\n",
    "print (len(trainset.data))\n",
    "print (len(trainset.target))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "tfidf = TfidfVectorizer(min_df=10).fit(trainset.data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "out = tfidf.transform(trainset.data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "trainset.target = np.array(trainset.target)\n",
    "train_X, test_X, train_Y, test_Y = train_test_split(out, trainset.target, test_size = 0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.5/dist-packages/lightgbm/basic.py:642: UserWarning: max_bin keyword has been found in `params` and will be ignored. Please use max_bin argument of the Dataset constructor to pass this parameter.\n",
      "  'Please use {0} argument of the Dataset constructor to pass this parameter.'.format(key))\n",
      "/usr/local/lib/python3.5/dist-packages/lightgbm/basic.py:648: LGBMDeprecationWarning: The `max_bin` parameter is deprecated and will be removed in 2.0.12 version. Please use `params` to pass this parameter.\n",
      "  'Please use `params` to pass this parameter.', LGBMDeprecationWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1]\tvalid_0's multi_logloss: 1.71011\tvalid_1's multi_logloss: 1.71044\n",
      "Training until validation scores don't improve for 20 rounds.\n",
      "[2]\tvalid_0's multi_logloss: 1.62928\tvalid_1's multi_logloss: 1.62947\n",
      "[3]\tvalid_0's multi_logloss: 1.56092\tvalid_1's multi_logloss: 1.56093\n",
      "[4]\tvalid_0's multi_logloss: 1.4939\tvalid_1's multi_logloss: 1.4937\n",
      "[5]\tvalid_0's multi_logloss: 1.43481\tvalid_1's multi_logloss: 1.43446\n",
      "[6]\tvalid_0's multi_logloss: 1.38164\tvalid_1's multi_logloss: 1.38169\n",
      "[7]\tvalid_0's multi_logloss: 1.33566\tvalid_1's multi_logloss: 1.33626\n",
      "[8]\tvalid_0's multi_logloss: 1.35761\tvalid_1's multi_logloss: 1.35815\n",
      "[9]\tvalid_0's multi_logloss: 1.30537\tvalid_1's multi_logloss: 1.30536\n",
      "[10]\tvalid_0's multi_logloss: 1.2587\tvalid_1's multi_logloss: 1.25924\n",
      "[11]\tvalid_0's multi_logloss: 1.21731\tvalid_1's multi_logloss: 1.21784\n",
      "[12]\tvalid_0's multi_logloss: 1.23303\tvalid_1's multi_logloss: 1.23349\n",
      "[13]\tvalid_0's multi_logloss: 1.1928\tvalid_1's multi_logloss: 1.19306\n",
      "[14]\tvalid_0's multi_logloss: 1.15426\tvalid_1's multi_logloss: 1.15454\n",
      "[15]\tvalid_0's multi_logloss: 1.12207\tvalid_1's multi_logloss: 1.12239\n",
      "[16]\tvalid_0's multi_logloss: 1.08935\tvalid_1's multi_logloss: 1.08975\n",
      "[17]\tvalid_0's multi_logloss: 1.06299\tvalid_1's multi_logloss: 1.06341\n",
      "[18]\tvalid_0's multi_logloss: 1.03549\tvalid_1's multi_logloss: 1.03586\n",
      "[19]\tvalid_0's multi_logloss: 1.00738\tvalid_1's multi_logloss: 1.008\n",
      "[20]\tvalid_0's multi_logloss: 0.980773\tvalid_1's multi_logloss: 0.98122\n",
      "[21]\tvalid_0's multi_logloss: 0.989493\tvalid_1's multi_logloss: 0.990111\n",
      "[22]\tvalid_0's multi_logloss: 0.965328\tvalid_1's multi_logloss: 0.965789\n",
      "[23]\tvalid_0's multi_logloss: 0.940388\tvalid_1's multi_logloss: 0.940901\n",
      "[24]\tvalid_0's multi_logloss: 0.91884\tvalid_1's multi_logloss: 0.919423\n",
      "[25]\tvalid_0's multi_logloss: 0.89962\tvalid_1's multi_logloss: 0.900226\n",
      "[26]\tvalid_0's multi_logloss: 0.879377\tvalid_1's multi_logloss: 0.88001\n",
      "[27]\tvalid_0's multi_logloss: 0.859865\tvalid_1's multi_logloss: 0.860581\n",
      "[28]\tvalid_0's multi_logloss: 0.867049\tvalid_1's multi_logloss: 0.867738\n",
      "[29]\tvalid_0's multi_logloss: 0.848184\tvalid_1's multi_logloss: 0.848958\n",
      "[30]\tvalid_0's multi_logloss: 0.831817\tvalid_1's multi_logloss: 0.832629\n",
      "[31]\tvalid_0's multi_logloss: 0.834834\tvalid_1's multi_logloss: 0.835667\n",
      "[32]\tvalid_0's multi_logloss: 0.820154\tvalid_1's multi_logloss: 0.821035\n",
      "[33]\tvalid_0's multi_logloss: 0.803995\tvalid_1's multi_logloss: 0.804906\n",
      "[34]\tvalid_0's multi_logloss: 0.78845\tvalid_1's multi_logloss: 0.789479\n",
      "[35]\tvalid_0's multi_logloss: 0.796809\tvalid_1's multi_logloss: 0.797851\n",
      "[36]\tvalid_0's multi_logloss: 0.812434\tvalid_1's multi_logloss: 0.813509\n",
      "[37]\tvalid_0's multi_logloss: 0.796619\tvalid_1's multi_logloss: 0.797556\n",
      "[38]\tvalid_0's multi_logloss: 0.781902\tvalid_1's multi_logloss: 0.782891\n",
      "[39]\tvalid_0's multi_logloss: 0.766531\tvalid_1's multi_logloss: 0.767559\n",
      "[40]\tvalid_0's multi_logloss: 0.76399\tvalid_1's multi_logloss: 0.765049\n",
      "[41]\tvalid_0's multi_logloss: 0.774081\tvalid_1's multi_logloss: 0.775101\n",
      "[42]\tvalid_0's multi_logloss: 0.757558\tvalid_1's multi_logloss: 0.758583\n",
      "[43]\tvalid_0's multi_logloss: 0.766375\tvalid_1's multi_logloss: 0.767377\n",
      "[44]\tvalid_0's multi_logloss: 0.750344\tvalid_1's multi_logloss: 0.751435\n",
      "[45]\tvalid_0's multi_logloss: 0.738061\tvalid_1's multi_logloss: 0.739142\n",
      "[46]\tvalid_0's multi_logloss: 0.753949\tvalid_1's multi_logloss: 0.754967\n",
      "[47]\tvalid_0's multi_logloss: 0.740357\tvalid_1's multi_logloss: 0.74136\n",
      "[48]\tvalid_0's multi_logloss: 0.739947\tvalid_1's multi_logloss: 0.740991\n",
      "[49]\tvalid_0's multi_logloss: 0.75059\tvalid_1's multi_logloss: 0.751681\n",
      "[50]\tvalid_0's multi_logloss: 0.761188\tvalid_1's multi_logloss: 0.762205\n",
      "[51]\tvalid_0's multi_logloss: 0.747511\tvalid_1's multi_logloss: 0.74856\n",
      "[52]\tvalid_0's multi_logloss: 0.73484\tvalid_1's multi_logloss: 0.736026\n",
      "[53]\tvalid_0's multi_logloss: 0.74288\tvalid_1's multi_logloss: 0.744039\n",
      "[54]\tvalid_0's multi_logloss: 0.729507\tvalid_1's multi_logloss: 0.730777\n",
      "[55]\tvalid_0's multi_logloss: 0.716931\tvalid_1's multi_logloss: 0.718091\n",
      "[56]\tvalid_0's multi_logloss: 0.728822\tvalid_1's multi_logloss: 0.729943\n",
      "[57]\tvalid_0's multi_logloss: 0.715612\tvalid_1's multi_logloss: 0.716777\n",
      "[58]\tvalid_0's multi_logloss: 0.728277\tvalid_1's multi_logloss: 0.729478\n",
      "[59]\tvalid_0's multi_logloss: 0.740448\tvalid_1's multi_logloss: 0.741698\n",
      "[60]\tvalid_0's multi_logloss: 0.726418\tvalid_1's multi_logloss: 0.727687\n",
      "[61]\tvalid_0's multi_logloss: 0.735796\tvalid_1's multi_logloss: 0.737004\n",
      "[62]\tvalid_0's multi_logloss: 0.721296\tvalid_1's multi_logloss: 0.722471\n",
      "[63]\tvalid_0's multi_logloss: 0.708207\tvalid_1's multi_logloss: 0.709465\n",
      "[64]\tvalid_0's multi_logloss: 0.717114\tvalid_1's multi_logloss: 0.718406\n",
      "[65]\tvalid_0's multi_logloss: 0.724574\tvalid_1's multi_logloss: 0.725843\n",
      "[66]\tvalid_0's multi_logloss: 0.711847\tvalid_1's multi_logloss: 0.713183\n",
      "[67]\tvalid_0's multi_logloss: 0.699594\tvalid_1's multi_logloss: 0.701117\n",
      "[68]\tvalid_0's multi_logloss: 0.687897\tvalid_1's multi_logloss: 0.689411\n",
      "[69]\tvalid_0's multi_logloss: 0.696399\tvalid_1's multi_logloss: 0.697839\n",
      "[70]\tvalid_0's multi_logloss: 0.700264\tvalid_1's multi_logloss: 0.701673\n",
      "[71]\tvalid_0's multi_logloss: 0.708423\tvalid_1's multi_logloss: 0.709902\n",
      "[72]\tvalid_0's multi_logloss: 0.695702\tvalid_1's multi_logloss: 0.697287\n",
      "[73]\tvalid_0's multi_logloss: 0.683801\tvalid_1's multi_logloss: 0.68547\n",
      "[74]\tvalid_0's multi_logloss: 0.690084\tvalid_1's multi_logloss: 0.691725\n",
      "[75]\tvalid_0's multi_logloss: 0.678239\tvalid_1's multi_logloss: 0.679841\n",
      "[76]\tvalid_0's multi_logloss: 0.679088\tvalid_1's multi_logloss: 0.680646\n",
      "[77]\tvalid_0's multi_logloss: 0.684491\tvalid_1's multi_logloss: 0.686052\n",
      "[78]\tvalid_0's multi_logloss: 0.691559\tvalid_1's multi_logloss: 0.693184\n",
      "[79]\tvalid_0's multi_logloss: 0.680409\tvalid_1's multi_logloss: 0.682051\n",
      "[80]\tvalid_0's multi_logloss: 0.670088\tvalid_1's multi_logloss: 0.671706\n",
      "[81]\tvalid_0's multi_logloss: 0.676803\tvalid_1's multi_logloss: 0.678429\n",
      "[82]\tvalid_0's multi_logloss: 0.666873\tvalid_1's multi_logloss: 0.66845\n",
      "[83]\tvalid_0's multi_logloss: 0.674259\tvalid_1's multi_logloss: 0.675866\n",
      "[84]\tvalid_0's multi_logloss: 0.681078\tvalid_1's multi_logloss: 0.682693\n",
      "[85]\tvalid_0's multi_logloss: 0.688343\tvalid_1's multi_logloss: 0.689952\n",
      "[86]\tvalid_0's multi_logloss: 0.676272\tvalid_1's multi_logloss: 0.677889\n",
      "[87]\tvalid_0's multi_logloss: 0.66416\tvalid_1's multi_logloss: 0.665723\n",
      "[88]\tvalid_0's multi_logloss: 0.67239\tvalid_1's multi_logloss: 0.673944\n",
      "[89]\tvalid_0's multi_logloss: 0.678164\tvalid_1's multi_logloss: 0.679712\n",
      "[90]\tvalid_0's multi_logloss: 0.685284\tvalid_1's multi_logloss: 0.686821\n",
      "[91]\tvalid_0's multi_logloss: 0.692204\tvalid_1's multi_logloss: 0.693744\n",
      "[92]\tvalid_0's multi_logloss: 0.680233\tvalid_1's multi_logloss: 0.681793\n",
      "[93]\tvalid_0's multi_logloss: 0.668048\tvalid_1's multi_logloss: 0.669636\n",
      "[94]\tvalid_0's multi_logloss: 0.674307\tvalid_1's multi_logloss: 0.675851\n",
      "[95]\tvalid_0's multi_logloss: 0.679994\tvalid_1's multi_logloss: 0.681506\n",
      "[96]\tvalid_0's multi_logloss: 0.685611\tvalid_1's multi_logloss: 0.687111\n",
      "[97]\tvalid_0's multi_logloss: 0.673427\tvalid_1's multi_logloss: 0.675009\n",
      "[98]\tvalid_0's multi_logloss: 0.679963\tvalid_1's multi_logloss: 0.68157\n",
      "[99]\tvalid_0's multi_logloss: 0.669534\tvalid_1's multi_logloss: 0.671087\n",
      "[100]\tvalid_0's multi_logloss: 0.658184\tvalid_1's multi_logloss: 0.659713\n",
      "[101]\tvalid_0's multi_logloss: 0.66712\tvalid_1's multi_logloss: 0.668633\n",
      "[102]\tvalid_0's multi_logloss: 0.673044\tvalid_1's multi_logloss: 0.674521\n",
      "[103]\tvalid_0's multi_logloss: 0.679652\tvalid_1's multi_logloss: 0.681111\n",
      "[104]\tvalid_0's multi_logloss: 0.668636\tvalid_1's multi_logloss: 0.670057\n",
      "[105]\tvalid_0's multi_logloss: 0.676431\tvalid_1's multi_logloss: 0.677863\n",
      "[106]\tvalid_0's multi_logloss: 0.683205\tvalid_1's multi_logloss: 0.684649\n",
      "[107]\tvalid_0's multi_logloss: 0.690396\tvalid_1's multi_logloss: 0.691862\n",
      "[108]\tvalid_0's multi_logloss: 0.678238\tvalid_1's multi_logloss: 0.679673\n",
      "[109]\tvalid_0's multi_logloss: 0.683773\tvalid_1's multi_logloss: 0.685225\n",
      "[110]\tvalid_0's multi_logloss: 0.690183\tvalid_1's multi_logloss: 0.691617\n",
      "[111]\tvalid_0's multi_logloss: 0.677701\tvalid_1's multi_logloss: 0.679162\n",
      "[112]\tvalid_0's multi_logloss: 0.666247\tvalid_1's multi_logloss: 0.667837\n",
      "[113]\tvalid_0's multi_logloss: 0.655061\tvalid_1's multi_logloss: 0.656693\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[114]\tvalid_0's multi_logloss: 0.644173\tvalid_1's multi_logloss: 0.645697\n",
      "[115]\tvalid_0's multi_logloss: 0.633144\tvalid_1's multi_logloss: 0.634696\n",
      "[116]\tvalid_0's multi_logloss: 0.622581\tvalid_1's multi_logloss: 0.624198\n",
      "[117]\tvalid_0's multi_logloss: 0.627606\tvalid_1's multi_logloss: 0.629208\n",
      "[118]\tvalid_0's multi_logloss: 0.618894\tvalid_1's multi_logloss: 0.620438\n",
      "[119]\tvalid_0's multi_logloss: 0.62397\tvalid_1's multi_logloss: 0.625505\n",
      "[120]\tvalid_0's multi_logloss: 0.630002\tvalid_1's multi_logloss: 0.631532\n",
      "[121]\tvalid_0's multi_logloss: 0.635848\tvalid_1's multi_logloss: 0.637364\n",
      "[122]\tvalid_0's multi_logloss: 0.642156\tvalid_1's multi_logloss: 0.643655\n",
      "[123]\tvalid_0's multi_logloss: 0.63234\tvalid_1's multi_logloss: 0.633817\n",
      "[124]\tvalid_0's multi_logloss: 0.623056\tvalid_1's multi_logloss: 0.624474\n",
      "[125]\tvalid_0's multi_logloss: 0.628607\tvalid_1's multi_logloss: 0.63002\n",
      "[126]\tvalid_0's multi_logloss: 0.63363\tvalid_1's multi_logloss: 0.635061\n",
      "[127]\tvalid_0's multi_logloss: 0.624202\tvalid_1's multi_logloss: 0.625579\n",
      "[128]\tvalid_0's multi_logloss: 0.629297\tvalid_1's multi_logloss: 0.630669\n",
      "[129]\tvalid_0's multi_logloss: 0.619817\tvalid_1's multi_logloss: 0.621144\n",
      "[130]\tvalid_0's multi_logloss: 0.610993\tvalid_1's multi_logloss: 0.61241\n",
      "[131]\tvalid_0's multi_logloss: 0.615002\tvalid_1's multi_logloss: 0.616414\n",
      "[132]\tvalid_0's multi_logloss: 0.605941\tvalid_1's multi_logloss: 0.607346\n",
      "[133]\tvalid_0's multi_logloss: 0.597734\tvalid_1's multi_logloss: 0.599269\n",
      "[134]\tvalid_0's multi_logloss: 0.60344\tvalid_1's multi_logloss: 0.604994\n",
      "[135]\tvalid_0's multi_logloss: 0.594691\tvalid_1's multi_logloss: 0.596191\n",
      "[136]\tvalid_0's multi_logloss: 0.601493\tvalid_1's multi_logloss: 0.602966\n",
      "[137]\tvalid_0's multi_logloss: 0.606537\tvalid_1's multi_logloss: 0.60801\n",
      "[138]\tvalid_0's multi_logloss: 0.5988\tvalid_1's multi_logloss: 0.600352\n",
      "[139]\tvalid_0's multi_logloss: 0.590093\tvalid_1's multi_logloss: 0.591635\n",
      "[140]\tvalid_0's multi_logloss: 0.59591\tvalid_1's multi_logloss: 0.597474\n",
      "[141]\tvalid_0's multi_logloss: 0.60063\tvalid_1's multi_logloss: 0.602203\n",
      "[142]\tvalid_0's multi_logloss: 0.60549\tvalid_1's multi_logloss: 0.607062\n",
      "[143]\tvalid_0's multi_logloss: 0.594903\tvalid_1's multi_logloss: 0.596499\n",
      "[144]\tvalid_0's multi_logloss: 0.599103\tvalid_1's multi_logloss: 0.600714\n",
      "[145]\tvalid_0's multi_logloss: 0.591151\tvalid_1's multi_logloss: 0.59279\n",
      "[146]\tvalid_0's multi_logloss: 0.597179\tvalid_1's multi_logloss: 0.598813\n",
      "[147]\tvalid_0's multi_logloss: 0.602232\tvalid_1's multi_logloss: 0.603859\n",
      "[148]\tvalid_0's multi_logloss: 0.607834\tvalid_1's multi_logloss: 0.609463\n",
      "[149]\tvalid_0's multi_logloss: 0.599043\tvalid_1's multi_logloss: 0.600694\n",
      "[150]\tvalid_0's multi_logloss: 0.590497\tvalid_1's multi_logloss: 0.592066\n",
      "[151]\tvalid_0's multi_logloss: 0.596437\tvalid_1's multi_logloss: 0.597991\n",
      "[152]\tvalid_0's multi_logloss: 0.601664\tvalid_1's multi_logloss: 0.603227\n",
      "[153]\tvalid_0's multi_logloss: 0.593667\tvalid_1's multi_logloss: 0.595155\n",
      "[154]\tvalid_0's multi_logloss: 0.597779\tvalid_1's multi_logloss: 0.599265\n",
      "[155]\tvalid_0's multi_logloss: 0.589556\tvalid_1's multi_logloss: 0.590941\n",
      "[156]\tvalid_0's multi_logloss: 0.594733\tvalid_1's multi_logloss: 0.596104\n",
      "[157]\tvalid_0's multi_logloss: 0.597508\tvalid_1's multi_logloss: 0.598874\n",
      "[158]\tvalid_0's multi_logloss: 0.602532\tvalid_1's multi_logloss: 0.603902\n",
      "[159]\tvalid_0's multi_logloss: 0.607513\tvalid_1's multi_logloss: 0.60889\n",
      "[160]\tvalid_0's multi_logloss: 0.612626\tvalid_1's multi_logloss: 0.614013\n",
      "[161]\tvalid_0's multi_logloss: 0.603282\tvalid_1's multi_logloss: 0.604584\n",
      "[162]\tvalid_0's multi_logloss: 0.607322\tvalid_1's multi_logloss: 0.608611\n",
      "[163]\tvalid_0's multi_logloss: 0.598953\tvalid_1's multi_logloss: 0.600317\n",
      "[164]\tvalid_0's multi_logloss: 0.591284\tvalid_1's multi_logloss: 0.59267\n",
      "[165]\tvalid_0's multi_logloss: 0.582764\tvalid_1's multi_logloss: 0.584242\n",
      "[166]\tvalid_0's multi_logloss: 0.574874\tvalid_1's multi_logloss: 0.576342\n",
      "[167]\tvalid_0's multi_logloss: 0.567118\tvalid_1's multi_logloss: 0.568678\n",
      "[168]\tvalid_0's multi_logloss: 0.559482\tvalid_1's multi_logloss: 0.561085\n",
      "[169]\tvalid_0's multi_logloss: 0.551829\tvalid_1's multi_logloss: 0.553409\n",
      "[170]\tvalid_0's multi_logloss: 0.55642\tvalid_1's multi_logloss: 0.558006\n",
      "[171]\tvalid_0's multi_logloss: 0.549616\tvalid_1's multi_logloss: 0.551244\n",
      "[172]\tvalid_0's multi_logloss: 0.554022\tvalid_1's multi_logloss: 0.555644\n",
      "[173]\tvalid_0's multi_logloss: 0.546764\tvalid_1's multi_logloss: 0.548434\n",
      "[174]\tvalid_0's multi_logloss: 0.539308\tvalid_1's multi_logloss: 0.540875\n",
      "[175]\tvalid_0's multi_logloss: 0.531885\tvalid_1's multi_logloss: 0.533467\n",
      "[176]\tvalid_0's multi_logloss: 0.53532\tvalid_1's multi_logloss: 0.536908\n",
      "[177]\tvalid_0's multi_logloss: 0.539522\tvalid_1's multi_logloss: 0.541109\n",
      "[178]\tvalid_0's multi_logloss: 0.543814\tvalid_1's multi_logloss: 0.545399\n",
      "[179]\tvalid_0's multi_logloss: 0.536675\tvalid_1's multi_logloss: 0.538153\n",
      "[180]\tvalid_0's multi_logloss: 0.540443\tvalid_1's multi_logloss: 0.541924\n",
      "[181]\tvalid_0's multi_logloss: 0.53392\tvalid_1's multi_logloss: 0.535493\n",
      "[182]\tvalid_0's multi_logloss: 0.527502\tvalid_1's multi_logloss: 0.529112\n",
      "[183]\tvalid_0's multi_logloss: 0.520657\tvalid_1's multi_logloss: 0.522295\n",
      "[184]\tvalid_0's multi_logloss: 0.514242\tvalid_1's multi_logloss: 0.515941\n",
      "[185]\tvalid_0's multi_logloss: 0.518845\tvalid_1's multi_logloss: 0.520543\n",
      "[186]\tvalid_0's multi_logloss: 0.512265\tvalid_1's multi_logloss: 0.514025\n",
      "[187]\tvalid_0's multi_logloss: 0.505579\tvalid_1's multi_logloss: 0.507274\n",
      "[188]\tvalid_0's multi_logloss: 0.499514\tvalid_1's multi_logloss: 0.501211\n",
      "[189]\tvalid_0's multi_logloss: 0.503781\tvalid_1's multi_logloss: 0.505477\n",
      "[190]\tvalid_0's multi_logloss: 0.507884\tvalid_1's multi_logloss: 0.50959\n",
      "[191]\tvalid_0's multi_logloss: 0.50179\tvalid_1's multi_logloss: 0.503537\n",
      "[192]\tvalid_0's multi_logloss: 0.496611\tvalid_1's multi_logloss: 0.498486\n",
      "[193]\tvalid_0's multi_logloss: 0.49119\tvalid_1's multi_logloss: 0.493192\n",
      "[194]\tvalid_0's multi_logloss: 0.494802\tvalid_1's multi_logloss: 0.496802\n",
      "[195]\tvalid_0's multi_logloss: 0.498105\tvalid_1's multi_logloss: 0.500094\n",
      "[196]\tvalid_0's multi_logloss: 0.492784\tvalid_1's multi_logloss: 0.494765\n",
      "[197]\tvalid_0's multi_logloss: 0.495679\tvalid_1's multi_logloss: 0.497665\n",
      "[198]\tvalid_0's multi_logloss: 0.499192\tvalid_1's multi_logloss: 0.501165\n",
      "[199]\tvalid_0's multi_logloss: 0.493411\tvalid_1's multi_logloss: 0.495425\n",
      "[200]\tvalid_0's multi_logloss: 0.497546\tvalid_1's multi_logloss: 0.499559\n",
      "[201]\tvalid_0's multi_logloss: 0.501178\tvalid_1's multi_logloss: 0.503182\n",
      "[202]\tvalid_0's multi_logloss: 0.504878\tvalid_1's multi_logloss: 0.506866\n",
      "[203]\tvalid_0's multi_logloss: 0.498813\tvalid_1's multi_logloss: 0.500771\n",
      "[204]\tvalid_0's multi_logloss: 0.50187\tvalid_1's multi_logloss: 0.503813\n",
      "[205]\tvalid_0's multi_logloss: 0.505529\tvalid_1's multi_logloss: 0.507468\n",
      "[206]\tvalid_0's multi_logloss: 0.499702\tvalid_1's multi_logloss: 0.501634\n",
      "[207]\tvalid_0's multi_logloss: 0.503338\tvalid_1's multi_logloss: 0.50528\n",
      "[208]\tvalid_0's multi_logloss: 0.496933\tvalid_1's multi_logloss: 0.498919\n",
      "[209]\tvalid_0's multi_logloss: 0.500754\tvalid_1's multi_logloss: 0.502732\n",
      "[210]\tvalid_0's multi_logloss: 0.504278\tvalid_1's multi_logloss: 0.506254\n",
      "[211]\tvalid_0's multi_logloss: 0.50762\tvalid_1's multi_logloss: 0.509593\n",
      "[212]\tvalid_0's multi_logloss: 0.501575\tvalid_1's multi_logloss: 0.503581\n",
      "[213]\tvalid_0's multi_logloss: 0.495678\tvalid_1's multi_logloss: 0.497712\n",
      "Early stopping, best iteration is:\n",
      "[193]\tvalid_0's multi_logloss: 0.49119\tvalid_1's multi_logloss: 0.493192\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "LGBMClassifier(boosting_type='dart', colsample_bytree=0.4, device='gpu',\n",
       "        learning_rate=0.1, max_bin=255, max_depth=-1, min_child_samples=20,\n",
       "        min_child_weight=0.001, min_split_gain=0.0, n_estimators=10000,\n",
       "        n_jobs=-1, num_leaves=31, objective='multiclass',\n",
       "        random_state=None, reg_alpha=0.0, reg_lambda=0.0005, silent=False,\n",
       "        subsample=0.8, subsample_for_bin=200000, subsample_freq=1)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "params_lgd = {\n",
    "    'boosting_type': 'dart',\n",
    "    'objective': 'multiclass',\n",
    "    'colsample_bytree': 0.4,\n",
    "    'subsample': 0.8,\n",
    "    'learning_rate': 0.1,\n",
    "    'silent': False,\n",
    "    'n_estimators': 10000,\n",
    "    'reg_lambda': 0.0005,\n",
    "    'device':'gpu'\n",
    "    }\n",
    "clf = lgb.LGBMClassifier(**params_lgd)\n",
    "clf.fit(train_X,train_Y, eval_set=[(train_X,train_Y), (test_X,test_Y)], \n",
    "        eval_metric='logloss', early_stopping_rounds=20, verbose=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accuracy validation set:  0.902245627504\n",
      "             precision    recall  f1-score   support\n",
      "\n",
      "      anger       0.91      0.91      0.91     11550\n",
      "       fear       0.84      0.89      0.86      9455\n",
      "        joy       0.92      0.92      0.92     28299\n",
      "       love       0.77      0.88      0.82      6910\n",
      "    sadness       0.96      0.92      0.94     24111\n",
      "   surprise       0.82      0.70      0.75      3037\n",
      "\n",
      "avg / total       0.90      0.90      0.90     83362\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from sklearn import metrics\n",
    "predicted = clf.predict(test_X)\n",
    "print('accuracy validation set: ', np.mean(predicted == test_Y))\n",
    "\n",
    "# print scores\n",
    "print(metrics.classification_report(test_Y, predicted, target_names = trainset.target_names))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
